#load data#
load("~/Desktop/thesis_data/wv6_2010-2014/WV6_Data_r_v_2015_04_18.rdata")
wv6 <- WV6_Data_v_2015_04_18
library(plyr)
library(Matrix)
library(Rcpp)
library(lme4)
library(QMSS)
library(doBy)
library(vcd)
library(lme4)
library(ggplot2)
library(usdm)
library(xlsx)

#preprocess data
wv6 <- wv6 [ which(wv6$V105>0),]
wv6 <- wv6 [ which(wv6$V102>0),]
wv6 <- wv6 [ which(wv6$V103>0),]
wv6 <- wv6 [ which(wv6$V104>0),]
wv6 <- wv6 [ which(wv6$V240>0),]
wv6 <- wv6 [ which(wv6$V57>0),]
wv6 <- wv6 [ which(wv6$V23>0),]
wv6 <- wv6 [ which(wv6$V10<5),]
wv6 <- wv6 [ which(wv6$V10>0),]
wv6 <- wv6 [ which(wv6$V96>0),]
wv6 <- wv6 [ which(wv6$V11>0),]
wv6 <- wv6 [ which(wv6$V239>0),]
wv6 <- wv6 [ which(wv6$V248>0),]
wv6 <- wv6 [ which(wv6$V242>0),]
wv6 <- wv6 [ which(wv6$V147>0),]
wv6 <- wv6 [ which(wv6$V229>0),]
wv6$V10 <- 5-wv6$V10
wv6$V102 <- 5-wv6$V102
wv6$V105 <- 5-wv6$V105
wv6$V103 <- 5-wv6$V103
wv6$V104 <- 5-wv6$V104
wv6$V11 <- 5-wv6$V11
wv6$par_trust <-with(wv6,(V103+V104)/2)
wv6 = rename(wv6, c("V102"="fam_trust"))
wv6 = rename(wv6, c("V105"="gen_trust"))
wv6$married=ifelse(wv6$V57==1, 1,0)
wv6$male <- ifelse(wv6$V240==1,1,0)
wv6$V2[wv6$V2==156] <- "China"
wv6$V2[wv6$V2==392] <- "Japan"
wv6$V2[wv6$V2==410] <- "South Korea"
wv6$V2[wv6$V2==840] <- "US"
wv6$V2[wv6$V2==276] <- "Germany"
wv6$V2[wv6$V2==400] <- "Jordan"
wv6$V2[wv6$V2==752] <- "Sweden"
wv6$V2[wv6$V2==112] <- "Belarus"
wv6$V2[wv6$V2==12] <- "Algeria"
wv6$V2[wv6$V2==152] <- "Chile"
wv6$V2[wv6$V2==158] <- "Taiwan"
wv6$V2[wv6$V2==170] <- "Colombia"
wv6$V2[wv6$V2==196] <- "Cyprus"
wv6$V2[wv6$V2==218] <- "Ecuador"
wv6$V2[wv6$V2==233] <- "Estonia"
wv6$V2[wv6$V2==268] <- "Georgia"
wv6$V2[wv6$V2==275] <- "Palestine"
wv6$V2[wv6$V2==288] <- "Ghana"
wv6$V2[wv6$V2==31] <- "Azerbaijan"
wv6$V2[wv6$V2==32] <- "Argentina"
wv6$V2[wv6$V2==344] <- "Hong Kong"
wv6$V2[wv6$V2==356] <- "India"
wv6$V2[wv6$V2==36] <- "Australia"
wv6$V2[wv6$V2==368] <- "Iraq"
wv6$V2[wv6$V2==398] <- "Kazakhstan"
wv6$V2[wv6$V2==414] <- "Kuwait"
wv6$V2[wv6$V2==417] <- "Kyrgyzstan"
wv6$V2[wv6$V2==422] <- "Lebanon"
wv6$V2[wv6$V2==434] <- "Libya"
wv6$V2[wv6$V2==458] <- "Malaysia"
wv6$V2[wv6$V2==48] <- "Bahrain"
wv6$V2[wv6$V2==484] <- "Mexico"
wv6$V2[wv6$V2==504] <- "Morocco"
wv6$V2[wv6$V2==51] <- "Armenia"
wv6$V2[wv6$V2==528] <- "Netherlands"
wv6$V2[wv6$V2==566] <- "Nigeria"
wv6$V2[wv6$V2==586] <- "Pakistan"
wv6$V2[wv6$V2==604] <- "Peru"
wv6$V2[wv6$V2==608] <- "Philippines"
wv6$V2[wv6$V2==616] <- "Poland"
wv6$V2[wv6$V2==634] <- "Qatar"
wv6$V2[wv6$V2==642] <- "Romania"
wv6$V2[wv6$V2==643] <- "Russia"
wv6$V2[wv6$V2==646] <- "Rwanda"
wv6$V2[wv6$V2==702] <- "Singapore"
wv6$V2[wv6$V2==705] <- "Slovenia"
wv6$V2[wv6$V2==710] <- "South Africa"
wv6$V2[wv6$V2==716] <- "Zimbabwe"
wv6$V2[wv6$V2==724] <- "Spain"
wv6$V2[wv6$V2==76] <- "Brazil"
wv6$V2[wv6$V2==764] <- "Thailand"
wv6$V2[wv6$V2==780] <- "Trinidad and Tobago"
wv6$V2[wv6$V2==788] <- "Tunisia"
wv6$V2[wv6$V2==792] <- "Turkey"
wv6$V2[wv6$V2==804] <- "Ukraine"
wv6$V2[wv6$V2==818] <- "Egypt"
wv6$V2[wv6$V2==858] <- "Uruguay"
wv6$V2[wv6$V2==860] <- "Uzbekistan"
wv6$V2[wv6$V2==887] <- "Yemen"
wv6 = rename(wv6, c("V2"="country"))
wv6 = rename(wv6, c("V10"="happy"))
wv6 = rename(wv6, c("V11"="health"))
wv6 = rename(wv6, c("V23"="satlife"))
wv6 = rename(wv6, c("V239"="income"))
wv6 = rename(wv6, c("V147"="religious"))
wv6 = rename(wv6, c("V248"="education"))
wv6 = rename(wv6, c("V229"="employment"))
table(wv6$China)
###add country-level variables
mydata <- read.xlsx("~/Desktop/country_mac_data.xlsx", sheetName = "sheet")
wv6 <- merge(wv6,mydata,by="country")
wv6<-ddply(wv6,"country",mutate,sum_trust=(fam_trust+par_trust+gen_trust)/3)
wv6$sum_trust <- (wv6$fam_trust+wv6$par_trust+wv6$gen_trust)/3
wv6$ln.GDP <- log(wv6$GDP.per.cap)
wv6$Gini <- wv6$GINI.Index/100
wv6<-ddply(wv6,"country",mutate,Gini)
wv6<-ddply(wv6,"country",mutate,ln.GDP)
with(wv6,by(sum_trust,income,summary))

#descriptive analysis
describe(wv6$V105)
summaryBy(happy~country, data=wv6, FUN=c(mean,sd), na.rm=T)
summaryBy(satlife~country, data=wv6, FUN=c(mean,sd), na.rm=T)
summaryBy(fam_trust~country, data=wv6, FUN=c(mean,sd), na.rm=T)
summaryBy(par_trust~country, data=wv6, FUN=c(mean,sd), na.rm=T)
summaryBy(gen_trust~country, data=wv6, FUN=c(mean,sd), na.rm=T)
summaryBy(sum_trust~country, data=wv6, FUN=c(mean,sd), na.rm=T)
summaryBy(health~country, data=wv6, FUN=c(mean,sd), na.rm=T)
##correlation of trust
cor.vars <- c("V102","V103","V104","V105")
cor.vars <- c("V102","V103","V104","V105")
cor.dat<-by.year.ts(,corvars)
data<- data.matrix(cor.vars,cor.vars)
cor(data)
?table
ctab <- table(wv6$V102,wv6$V103)
ctab <- table(wv6$V102,wv6$V102)
chisq.test(ctab)
addmargins(ctab)
ctab2 <- table(wv6$V102,wv6$V104)
chisq.test(ctab2)
ctab3 <- table(wv6$V102,wv6$V105)
chisq.test(ctab3)
ctab4 <- table(wv6$V103,wv6$V104)
chisq.test(ctab4)
(ct <- corresp(~ V102 + V104, data = wv6))
(ct <- corresp(~ V104 + V105, data = wv6))
(ct <- corresp(~ V102 + V105, data = wv6))
(ct <- corresp(~ V105 + V104, data = wv6))
chi2 = chisq.test(ctab2, correct=F)
chi2 = chisq.test(ctab3, correct=F)
chi2 = chisq.test(ctab4, correct=F)
chi2 = chisq.test(ctab, correct=F)
c(chi2$statistic, chi2$p.value)
sqrt(chi2$statistic / sum(ctab2))
sqrt(chi2$statistic / sum(ctab3))

#simple OLS
lm1 <- lm(satlife ~ fam_trust+par_trust+gen_trust+married+male+education+income+health+as.factor(religious)+as.factor(employment)+ln.GDP+Gini,data=wv6)
summary(lm1)
lm2 <- lm(happy ~ fam_trust+par_trust+gen_trust+married+male+education+income+health+as.factor(religious)+as.factor(employment)+ln.GDP+Gini,data=wv6)
summary(lm2)

#ordinal logistic regression of 57 countries
vglm1 <- vglm(as.numeric(happy) ~ fam_trust+par_trust+gen_trust+married+male+education+income+health+as.factor(religious)+as.factor(employment),subset=country=="Trinidad and Tobago",data=wv6,family = propodds)
summary(vglm1)
vglm2 <- vglm(as.numeric(satlife) ~ fam_trust+par_trust+gen_trust+married+male+education+income+health+as.factor(religious)+as.factor(employment),data=wv6,family = propodds)
summary(vglm2)

#multilevel analysis
nullmodel <- lmer(happy ~ (1|country),data=wv6,REML=FALSE)
summary(nullmodel)
##empty random intercept
rho(nullmodel)
countries <- unique(wv6$country)
regs <- sapply (
  X = 1:length(countries),
  FUN <- function(i) {
    lm <- lm(happy ~ fam_trust+par_trust+gen_trust+married+health+income+married+male,data=wv6,country==countries[i])
    c(coef(lm)[1:2],mean(lm$fitted))
  })
rownames(regs)=c("b_const","b_fam_trust","mean.fitted")
overall<-round (rowMeans(regs,na.rm=T),2)
overall
wv6$country
## plot the above
regs.df <- data.frame(t(regs))
no_legend<-theme(legend.position="none")
g_fam_trust <- ggplot(regs.df,aes(x=b_fam_trust,y=mean.fitted,color=factor(mean.fitted)))
g_fam_trust<-g_fam_trust+no_legend
g_fam_trust+geom_point()
mean_point<-annotate("point",x=overall[2],y=overall[3],color="navyblue",size=8)
g_fam_trust+geom_point()+mean_point
##plot for par_trust
regs <- sapply (
  X = 1:length(countries),
  FUN <- function(i) {
    lm <- lm(happy ~ par_trust+fam_trust+gen_trust+married+health+income+married+male,data=wv6,country==countries[i])
    c(coef(lm)[1:2],mean(lm$fitted))
  })
rownames(regs)=c("b_const","b_par_trust","mean.fitted")
overall<-round (rowMeans(regs,na.rm=T),2)
overall
regs.df <- data.frame(t(regs))
no_legend<-theme(legend.position="none")
g_par_trust <- ggplot(regs.df,aes(x=b_par_trust,y=mean.fitted,color=factor(mean.fitted)))
g_par_trust<-g_par_trust+no_legend
g_par_trust+geom_point()
mean_point<-annotate("point",x=overall[2],y=overall[3],color="navyblue",size=8)
g_par_trust+geom_point()+mean_point
##plot for gen_trust
regs <- sapply (
  X = 1:length(countries),
  FUN <- function(i) {
    lm <- lm(happy ~ gen_trust+par_trust+fam_trust+gen_trust+married+health,data=wv6,country==countries[i])
    c(coef(lm)[1:2],mean(lm$fitted))
  })
rownames(regs)=c("b_const","b_gen_trust","mean.fitted")
overall<-round (rowMeans(regs,na.rm=T),2)
overall
regs.df <- data.frame(t(regs))
no_legend<-theme(legend.position="none")
g_gen_trust <- ggplot(regs.df,aes(x=b_gen_trust,y=mean.fitted,color=factor(mean.fitted)))
g_gen_trust<-g_gen_trust+no_legend
g_gen_trust+geom_point()
mean_point<-annotate("point",x=overall[2],y=overall[3],color="navyblue",size=8)
g_gen_trust+geom_point()+mean_point
## random slopes
lmer1<- lmer(happy ~ fam_trust+par_trust+gen_trust+married+male+health+income+(1+gen_trust|country),data=wv6,REML=FALSE)
summary(lmer1)
###Graph: correlation between coef of trust and intercept for each country
fam_trust_vs_const <- ggplot (regs.df,aes(x=b_fam_trust,y=b_const))+no_legend
fam_trust_vs_const+geom_point(aes(color=factor(b_const)))+stat_smooth(method="lm",se=F)
par_trust_vs_const <- ggplot (regs.df,aes(x=b_par_trust,y=b_const))+no_legend
par_trust_vs_const+geom_point(aes(color=factor(b_const)))+stat_smooth(method="lm",se=F)
gen_trust_vs_const <- ggplot (regs.df,aes(x=b_gen_trust,y=b_const))+no_legend
gen_trust_vs_const+geom_point(aes(color=factor(b_const)))+stat_smooth(method="lm",se=F)
###Compare random intercept vs. random intercept+slope
anova(lmer1,lmer2)
###Add cross-national interactions
lmer2<- lmer(satlife ~ ln.GDP*par_trust+ln.GDP*fam_trust+married+male+health+income+education+(1+fam_trust+par_trust|country),data=wv6,REML=FALSE)
summary(lmer2)
lm2 <- lm(satlife ~ Gini*agg_trust+married+male+education+income+health,data=wv6)
summary(lm2)

##discussion
###collinearity
wv6$fam_trust
wv6<-na.omit(wv6)
dummy_df <- data.frame(wv6$fam_trust, wv6$par_trust,wv6$gen_trust,wv6$married,wv6$health)
dummy_df3 <- data.frame(wv6$fam_trust, wv6$par_trust,wv6$gen_trust,wv6$married,wv6$male,wv6$health,wv6$income,wv6$education,wv6$ln.GDP,wv6$Gini)
vifcor(dummy_df3,th=0.9)
vifstep(dummy_df3,th=10)


